Data Visualization using ggplot

Alice Kamau| Ken Mwai | Mark Otiende

6/5/2021

Learning objectives

Building your plots iteratively

library(tidyverse)
setwd("~")
bw_df <- read.csv("Data/birthweight2.csv")
names(bw_df)
##  [1] "id"      "matage"  "ht"      "gestwks" "sex"    
##  [6] "bweight" "ethnic"  "lbw"     "agegrp"  "lbw2"   
## [11] "agegrp1"

Boxplot - for a categorical and continuous variable

Lets do a Box plot?

ggplot(data = bw_df)

Adding aesthetics and labels

ggplot(data = bw_df) + geom_boxplot(aes(y = bweight, x = sex, 
    fill = sex)) + ylab("Birth Weight") + xlab("Sex") + ggtitle("B weight vs Sex")

Box plot and add scatter

ggplot(data = bw_df, mapping = aes(y = bweight, x = sex, fill = sex)) + 
    geom_boxplot() + geom_point() + ylab("Birth Weight") + xlab("Sex") + 
    ggtitle("Birth weight vs Sex")

Box plot and add scatter points that are jittered

ggplot(data = bw_df, mapping = aes(y = bweight, x = sex, fill = sex)) + 
    geom_boxplot() + geom_point() + geom_jitter() + ylab("Birth Weight") + 
    xlab("Sex") + ggtitle("Birth weight vs Sex")

Violin plot

ggplot(data = bw_df, mapping = aes(y = bweight, x = sex, fill = sex)) + 
    geom_violin() + geom_point() + geom_jitter() + ylab("Birth Weight") + 
    xlab("Sex") + ggtitle("Birth weight vs Sex")

Scatter plot with ggplot2 - for two continuous variables

# Start with data and aesthetics
ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex))

Add a point geom

# Start with data and aesthetics
ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex)) + # Add a point geom
geom_point()

Add a smooth geom

# Start with data and aesthetics
ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex)) + # Add a point geom
geom_point() + ## Add a smooth geom
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Make the smooth geom straight

# Start with data and aesthetics
ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex)) + # Add a point geom
geom_point() + ## Add a smooth geom geom_smooth() + Make it straight
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

Faceting

Facet by sex

# Start with data and aesthetics
ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex)) + # Add a point geom
geom_point() + ## Add a smooth geom geom_smooth() + Make it straight
geom_smooth(method = "lm") + # Facet by sex
facet_wrap(vars(sex), ncol = 2)
## `geom_smooth()` using formula 'y ~ x'

add labels

# Start with data and aesthetics
ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex)) + # Add a point geom
geom_point() + ## Add a smooth geom geom_smooth() + Make it straight
geom_smooth(method = "lm") + # Facet by sex
facet_wrap(vars(sex), ncol = 2) + ## add labels
labs(x = "Gestation weeks", y = "Birthweight", color = "Sex", 
    title = "Lower gestation weeks leads to low birthweight", 
    subtitle = "Birth weight is in grams", caption = "Is there something we can observe by gender")
## `geom_smooth()` using formula 'y ~ x'

adding ggplot2 themes

## `geom_smooth()` using formula 'y ~ x'

Exporting plots

my_plot2 <- ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex)) + geom_point() + geom_smooth(method = "lm") + 
    facet_wrap(vars(sex), ncol = 2) + labs(x = "Gestation weeks", 
    y = "Birthweight", color = "Sex", title = "Lower gestation weeks leads to low birthweight", 
    subtitle = "Birth weight is in grams", caption = "Is there something we can observe by gender") + 
    theme_bw() + theme(plot.title = element_text(size = 15, face = "bold"), 
    axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8), 
    axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 10)) + 
    scale_color_discrete(name = "Sex")
ggsave("Output/Bweight.pdf", my_plot2, width = 15, height = 10)
## `geom_smooth()` using formula 'y ~ x'
my_plot2
## `geom_smooth()` using formula 'y ~ x'

Break out session - Exercises

ggplot(data = bw_df, mapping = aes(x = gestwks, y = bweight, 
    color = sex)) + geom_point() + geom_smooth(method = "lm") + 
    facet_wrap(vars(sex), ncol = 2) + labs(x = "Gestation weeks", 
    y = "Birthweight", color = "Sex", title = "Lower gestation weeks leads to low birthweight", 
    subtitle = "Birth weight is in grams", caption = "Is there something we can observe by gender") + 
    theme_bw() + theme(plot.title = element_text(size = 15, face = "bold"), 
    axis.text.x = element_text(size = 8), axis.text.y = element_text(size = 8), 
    axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 10)) + 
    scale_color_discrete(name = "Sex")
## `geom_smooth()` using formula 'y ~ x'

Solution

## `geom_smooth()` using formula 'y ~ x'